### Replication script for Figure 2 and qualitative validation exercise ###
library(tidyverse) # 1.3.2
library(autoMrP) # 1.0.2
library(sf) # 1.0.14
library(fixest) # 0.10.4

## Figure 2
# Load ingredients for MRP and district covariates
load("lb_clean.RData")
load("census_binned.RData")
load("district_covars.RData")

mrp_survey <- lb_clean %>%
  left_join(dplyr::select(district_covars, district_code, luminosity_pc, road_density, log_population, pop_density)) %>%
  dplyr::select(district_code, crim_presence, crim_gov, SEX, AGE, EDATTAIN, UNEMPLOYED, luminosity_pc, road_density, log_population, pop_density) %>%
  drop_na()

# MRP estimation for district-level criminal governance
ml_out <- auto_MrP(
  y = "crim_gov",
  L1.x = c("SEX",  "AGE", "EDATTAIN", "UNEMPLOYED"),
  L2.x = c("road_density", "log_population", "pop_density", "luminosity_pc"),
  L2.unit = "district_code",
  bin.proportion = "proportion",
  survey = mrp_survey,
  census = census_binned,
  pca = FALSE,
  gb = FALSE,
  svm = FALSE,
  lasso = FALSE,
  best.subset = FALSE,
  # Just MRP
  mrp = TRUE,
  uncertainty = FALSE,
  cores = 2)

estimates_gov <- ml_out$classifiers %>%
  rename(crim_gov = mrp) %>%
  left_join(district_covars) %>%
  mutate(pop_density = as.numeric(pop_density))

# Plot
ggplot(estimates_gov) +
  geom_point(aes(x = log_population, y = crim_gov)) + 
  geom_smooth(aes(x = log_population, y = crim_gov), color = "darkred") + 
  geom_vline(xintercept = log(1000000), lty = 3) +
  labs(x = "ln(District population)", y = "MRP estimate of criminal governance rate", title = "District population and rate of criminal governance", 
       subtitle = "Dotted line marks 1,000,000 residents") +
  theme_minimal()

ggsave("figure_2.pdf", height = 6, width = 10, bg = "white")

## Qualitative validation

# Load qualitative validation set
load("qual_coding.RData")

# Merge
validation_set <- estimates_gov %>%
  dplyr::select(district_code, crimgov_lb = crim_gov) %>%
  left_join(qual_coding) %>%
  # Only districts where we have qualitative codings
  drop_na(crimgov_qual)

# Estimate correlations
m1 <- feols(crimgov_lb ~ crimgov_qual, data = validation_set)
m2 <- feols(crimgov_lb ~ crimgov_qual | country, data = validation_set)
etable(m1, m2,
       cluster = ~ country,
       tex = T)

